1 Load packages

library("tidytext")       # for text data
library("emmeans")        # for comparing models
library("knitr")          # for knitting  
library("brms")           # for Bayesian data analysis
library("janitor")        # for cleaning variable names
library("patchwork")      # for combining plots
library("tidyverse")      # for everything else 

2 Settings

# set theme
theme_set(theme_classic())

# suppress warnings about grouping 
options(dplyr.summarise.inform = F)

# suppress warnings and messages 
knitr::opts_chunk$set(echo = T, warning = F, message = F)

3 EXPERIMENT 1 (Basic-level)

4 DATA

4.1 Read in data

df.exp1 = read.csv("../../data/exp1/exp1.csv")

4.2 Wrangle

df.exp1 = df.exp1 %>%
  filter(include == 1) %>% 
  pivot_longer(cols = (bag: snail),
               names_to = "item",
               values_to = "rating",
               values_drop_na = TRUE) %>%
  relocate(participant_number, condition, item, rating, age, gender, lan_spoken) %>%
  mutate(kind = case_when(item == "bag" ~ 0,
                          item == "shoe" ~ 0,
                          item == "clock" ~ 0,
                          item == "phone" ~ 0,
                          item == "drum" ~ 0,
                          item == "spider" ~ 1,
                          item == "tree" ~ 1,
                          item == "snail" ~ 1)) %>%
  mutate(kind = factor(kind,
                       levels = c("0", "1"),
                       labels = c("artifact", "biological")))

# warm-up items
df.exp1.warmup = df.exp1 %>% 
  filter(item %in% c("bag", "shoe"))

#test items
df.exp1 = df.exp1 %>% 
  filter(!item %in% c("bag", "shoe"))

4.3 Demographics

  # gender
  df.exp1 %>%
    group_by(gender) %>%
    summarise(n = n_distinct(participant_number)) %>%
    print()
## # A tibble: 3 × 2
##   gender       n
##   <chr>    <int>
## 1 "female"    86
## 2 "male"      92
## 3 "male "      1
  # language spoken
df.exp1 %>%
    mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
    group_by(en_occurrence) %>%
    summarise(n = n_distinct(participant_number)) %>%
    print()
## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              172
## 2 non-en            6

5 STATS

5.1 Bayesian Regression

# sum coding
options(contrasts = c("contr.sum","contr.poly"))

df.model = df.exp1 %>% 
  mutate(condition = as_factor(condition),
         condition= fct_relevel(condition, "purpose"))

# Check contrast values
 contrasts(df.model$condition)
##            [,1] [,2]
## purpose       1    0
## label_only    0    1
## irrelevant   -1   -1
fit.brm.basic_level = brm(formula = rating ~ 1 + condition + (1 | participant_number) + (1 | item), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 8000,
                     warmup = 1000,
                     cores = 8,
                     control = list(adapt_delta = .999999,
                                    max_treedepth = 15),
                     file = "cache/fit.brm.basic_level") 

fit.brm.basic_level
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: rating ~ 1 + condition + (1 | participant_number) + (1 | item) 
##    Data: df.model (Number of observations: 1080) 
##   Draws: 4 chains, each with iter = 8000; warmup = 1000; thin = 1;
##          total post-warmup draws = 28000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.25      0.18     0.02     0.70 1.00     6924     8535
## 
## ~participant_number (Number of levels: 178) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.70      0.12     0.47     0.94 1.00     9286    12959
## 
## Regression Coefficients:
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept      0.10      0.15    -0.20     0.40 1.00    15424    14750
## condition1     0.87      0.13     0.63     1.12 1.00    20955    20549
## condition2    -0.57      0.12    -0.81    -0.34 1.00    21520    20793
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2 Hypotheis test of condition differneces and differnece from chance

fit.brm.basic_level %>%
  emmeans(pairwise ~ condition)
## $emmeans
##  condition  emmean lower.HPD upper.HPD
##  purpose     0.965     0.573     1.357
##  label_only -0.474    -0.867    -0.113
##  irrelevant -0.199    -0.575     0.186
## 
## Point estimate displayed: median 
## Results are given on the logit (not the response) scale. 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast                estimate lower.HPD upper.HPD
##  purpose - label_only       1.437     1.018     1.862
##  purpose - irrelevant       1.161     0.740     1.581
##  label_only - irrelevant   -0.274    -0.685     0.124
## 
## Point estimate displayed: median 
## Results are given on the log odds ratio (not the response) scale. 
## HPD interval probability: 0.95

5.3 Test of kind

fit.brm.basic_level.kind = brm(formula = rating ~ 1 + kind + (1 | participant_number) + (1 | item), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 8000,
                     warmup = 1000,
                     cores = 8,
                     control = list(adapt_delta = .999999,
                                    max_treedepth = 15),
                     file = "cache/fit.brm.basic_leve.kindl") 

fit.brm.basic_level.kind
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: rating ~ 1 + kind + (1 | participant_number) + (1 | item) 
##    Data: df.model (Number of observations: 1080) 
##   Draws: 4 chains, each with iter = 8000; warmup = 1000; thin = 1;
##          total post-warmup draws = 28000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.20      0.19     0.01     0.69 1.00     6788     9912
## 
## ~participant_number (Number of levels: 178) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.95      0.12     0.73     1.19 1.00    10870    17067
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     0.09      0.15    -0.21     0.37 1.00    16089    15202
## kind1         0.16      0.13    -0.10     0.42 1.00    15735    10645
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

#PLOT

5.4 Condition

df.exp1 = read.csv("../../data/exp1/exp1.csv")

df.plot = df.exp1 %>%
  filter(include == 1) %>% 
  select(-bag, -shoe) %>%
  mutate(across(.cols = clock : snail, .fns = as.numeric)) %>%
  mutate(total = clock + drum + phone + tree + spider + snail,
         percent = (total / 6) * 100) %>% 
  mutate(condition = str_replace_all(condition, "_", " ")) %>% 
  mutate(condition = factor(condition, levels = c("label only", "irrelevant", "purpose"))) %>% 
  select(participant_number, condition, percent) 

p1 = ggplot(data = df.plot,
         mapping = aes(x = condition,
                       y = percent,
                       group = condition,
                       color = condition,
                       fill = condition)) + 
    stat_summary(fun = mean, 
                 geom = "bar", 
                 position = position_dodge(width = 0.5), 
                 alpha = .9,
                 color = "black",
                 size = 1) +
     geom_hline(yintercept = 50, linetype="dashed", 
               color = "black", size=.5) +
     geom_point(aes(color = condition,
                   fill = condition),
               position = position_jitterdodge(dodge.width = 0.8,
                                               jitter.width = 0.5,
                                               jitter.height = 0.0),
               shape = 21,
               size = 2,
               color = "grey30",
               alpha = .4) +
    stat_summary(fun.data = mean_cl_boot, 
                 geom = "linerange", 
                 position = position_dodge(width = 0.5),
                 color = "black",
                 size = 1.2) +
 scale_fill_manual(values = c("label only" = "#377EB8", "irrelevant" = "#E41A1C", "purpose" = "#4DAF4A"), breaks=c("label only", "irrelevant", "purpose")) +
  scale_color_manual(values = c("label only" = "#377EB8", "irrelevant" = "#E41A1C", "purpose" = "#4DAF4A"), breaks=c("label only", "irrelevant", "purpose")) +
   scale_y_continuous(breaks = seq(0, 100, 25),
                        labels = str_c(seq(0, 100, 25), "%"),
                       limits = c(0, 100)) +
    labs(y = "Probability of selecting \n\ taxonomic match") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=18),
          axis.text.y = element_text(size = 14),
          legend.title = element_blank(),
          legend.text = element_text(size = 14),
          legend.position = "bottom",
          axis.text.x = element_blank(),
          plot.title = element_text(hjust = 0.5, face="bold", size=26, margin = margin(t = 20, b = 20))) +
    ggtitle("Overall")

p1

ggsave("../../figures/exp1/exp1_condition_bar.pdf",
       height = 5, 
       width = 8)

5.5 Kind by condition

df.exp1 = read.csv("../../data/exp1/exp1.csv")

df.plot = df.exp1 %>%
  filter(include == 1) %>% 
  select(-bag, -shoe) %>%
  pivot_longer(cols = (clock: snail),
               names_to = "item",
               values_to = "rating",
               values_drop_na = TRUE) %>%
  relocate(participant_number, condition, item, rating, age) %>%
  select(participant_number: age) %>%
  mutate(kind = case_when(item == "clock" ~ 0,
                          item == "phone" ~ 0,
                          item == "drum" ~ 0,
                          item == "spider" ~ 1,
                          item == "tree" ~ 1,
                          item == "snail" ~ 1)) %>%
  mutate(kind = factor(kind,
                       levels = c("0", "1"),
                       labels = c("artifacts", "biological kinds"))) %>% 
  group_by(participant_number, condition, kind) %>%
  summarise(total = sum(rating),
            percent = (total / 3) * 100) %>% 
  mutate(condition = str_replace_all(condition, "_", " ")) %>% 
  mutate(condition = factor(condition, levels = c("label only", "irrelevant", "purpose"))) %>% 
  select(participant_number, condition, kind, total, percent)



p2 = ggplot(data = df.plot,
         mapping = aes(x = condition,
                       y = percent,
                       group = kind,
                       color = kind,
                       fill = kind)) + 
    stat_summary(fun = mean, 
                 geom = "bar", 
                 position = position_dodge(width = 0.9), 
                 alpha = .9,
                 color = "black",
                 size = 1) +
     geom_hline(yintercept = 50, linetype="dashed", 
               color = "black", size=.5) +
     geom_point(aes(color = kind,
                   fill = kind),
               position = position_jitterdodge(dodge.width = 0.8,
                                               jitter.width = 0.5,
                                               jitter.height = 0.0),
               shape = 21,
               size = 2,
               color = "grey30",
               alpha = .4) +
    stat_summary(fun.data = mean_cl_boot, 
                 geom = "linerange", 
                 position = position_dodge(width = 0.9),
                 color = "black",
                 size = 1.2) +
 scale_fill_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
  scale_color_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
   scale_y_continuous(breaks = seq(0, 100, 25),
                        labels = str_c(seq(0, 100, 25), "%"),
                       limits = c(0, 100)) +
    labs(y = "Probability of selecting \n\ taxonomic match") +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(size = 14),
          axis.title.y = element_text(size=18),
          axis.text.y = element_text(size = 14),
          legend.title = element_blank(),
          legend.text = element_text(size = 14),
          legend.position = "bottom",
          plot.title = element_text(hjust = 0.5, face="bold", size=16, margin = margin(t = 20, b = 20))) +
    ggtitle("Artifacts vs. Biological Kinds")

p2

ggsave("../../figures/exp1/exp1_condition_kind_bar.pdf",
       height = 5, 
       width = 8)

5.6 Combine condition and kind plots

plot_basic = p1 + p2 &
plot_layout(nrow = 1) &
plot_annotation(title = "Basic-level categorization",
                tag_levels = "A") &
theme(plot.tag = element_text(face = "bold", size = 24),
      plot.title = element_text(face = "bold", hjust = 0.5, size = 24))

ggsave(plot_basic, filename = "../../figures/exp1/exp1_condition_kind_combined_bar.pdf", height = 5, width = 12)

plot_basic

6 EXPERIMENT 2 (Superordinate-level)

7 DATA

7.1 Read in data

df.exp2 = read.csv("../../data/exp2/exp2.csv")

7.2 Wrangle

df.exp2 = df.exp2 %>%
  filter(completed_all_trials == 1) %>% 
  mutate(condition = factor(condition,
                            levels = c("0", "1", "2"),
                            labels = c("label_only", "irrelevant", "purpose")),
         across(.cols = purse : flower, .fns = as.numeric)) %>%
   mutate(condition = relevel(condition, ref = "irrelevant")) %>% 
  pivot_longer(cols = (purse: flower),
               names_to = "item",
               values_to = "rating",
               values_drop_na = TRUE) %>%
  mutate(kind = case_when(item == "purse" ~ 0,
                          item == "shoe" ~ 0,
                          item == "plane" ~ 0,
                          item == "drum" ~ 0,
                          item == "quarter" ~ 0,
                          item == "apple" ~ 1,
                          item == "leaf_insect" ~ 1,
                          item == "flower" ~ 1)) %>%
  mutate(kind = factor(kind,
                       levels = c("0", "1"),
                       labels = c("artifact", "biological")))

# warm-up items
df.exp2.warmup = df.exp2 %>% 
  filter(item %in% c("purse", "shoe"))

#test items
df.exp2 = df.exp2 %>% 
  filter(!item %in% c("purse", "shoe"))

7.3 Demographics

  # gender
  df.exp2 %>%
    group_by(sex) %>%
    summarise(n = n_distinct(participant)) %>%
    print()
## # A tibble: 7 × 2
##   sex                      n
##   <chr>                <int>
## 1 0                       38
## 2 1                       46
## 3 Other                    1
## 4 Prefer not to answer     2
## 5 not specified            1
## 6 prefer not to answer     1
## 7 prefer not to aswer      1
  # language spoken
read.csv("../../data/exp2/exp2.csv") %>%
  clean_names() %>%
  mutate(native_english = ifelse(native_english == "-", NA,          as.numeric(native_english))) %>%
  summarize(sum(native_english, na.rm = TRUE))
##   sum(native_english, na.rm = TRUE)
## 1                                78

8 STATS

8.1 Bayesian Regression

# sum coding
options(contrasts = c("contr.sum","contr.poly"))

df.model = df.exp2 %>% 
  mutate(condition = as_factor(condition),
         condition= fct_relevel(condition, "purpose", "label_only", "irrelevant"))

# Check contrast values
 contrasts(df.model$condition)
##            [,1] [,2]
## purpose       1    0
## label_only    0    1
## irrelevant   -1   -1
fit.brm.superordinate_level = brm(formula = rating ~ 1 + condition + (1 | participant) + (1 | item), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 8000,
                     warmup = 1000,
                     cores = 8,
                     control = list(adapt_delta = .999999,
                                    max_treedepth = 15),
                     file = "cache/fit.brm.superordinate_level") 

fit.brm.superordinate_level
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: rating ~ 1 + condition + (1 | participant) + (1 | item) 
##    Data: df.model (Number of observations: 540) 
##   Draws: 4 chains, each with iter = 8000; warmup = 1000; thin = 1;
##          total post-warmup draws = 28000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.41      0.26     0.06     1.08 1.00     7926     8053
## 
## ~participant (Number of levels: 90) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.84      0.17     0.51     1.19 1.00    11201    15961
## 
## Regression Coefficients:
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     -0.09      0.23    -0.55     0.36 1.00    14374    15370
## condition1     0.90      0.19     0.53     1.29 1.00    19990    20055
## condition2    -0.33      0.19    -0.70     0.04 1.00    20651    20508
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

8.2 Hypotheis test of condition differneces and differnece from chance

fit.brm.superordinate_level %>%
  emmeans(pairwise ~ condition)
## $emmeans
##  condition  emmean lower.HPD upper.HPD
##  purpose     0.811     0.236    1.4289
##  label_only -0.416    -0.997    0.1598
##  irrelevant -0.666    -1.256   -0.0861
## 
## Point estimate displayed: median 
## Results are given on the logit (not the response) scale. 
## HPD interval probability: 0.95 
## 
## $contrasts
##  contrast                estimate lower.HPD upper.HPD
##  purpose - label_only       1.227     0.593      1.89
##  purpose - irrelevant       1.474     0.840      2.15
##  label_only - irrelevant    0.247    -0.385      0.89
## 
## Point estimate displayed: median 
## Results are given on the log odds ratio (not the response) scale. 
## HPD interval probability: 0.95

8.3 Test of kind

fit.brm.superordinate_level.kind = brm(formula = rating ~ 1 + kind + (1 | participant) + (1 | item), 
                     data = df.model,
                     family = "bernoulli",
                     seed = 1,
                     iter = 8000,
                     warmup = 1000,
                     cores = 8,
                     control = list(adapt_delta = .999999,
                                    max_treedepth = 15),
                     file = "cache/fit.brm.superordinate_level.kind") 

fit.brm.superordinate_level.kind
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: rating ~ 1 + kind + (1 | participant) + (1 | item) 
##    Data: df.model (Number of observations: 540) 
##   Draws: 4 chains, each with iter = 8000; warmup = 1000; thin = 1;
##          total post-warmup draws = 28000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.36      0.31     0.02     1.15 1.00     6099     8895
## 
## ~participant (Number of levels: 90) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.06      0.17     0.73     1.42 1.00    12107    17357
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -0.10      0.24    -0.57     0.40 1.00    11809    10272
## kind1        -0.23      0.22    -0.66     0.20 1.00    12651    10637
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

#PLOT

8.4 Condition

df.exp2 = read.csv("../../data/exp2/exp2.csv")

df.plot = df.exp2 %>%
  filter(completed_all_trials == 1) %>% 
   mutate(across(.cols = plane : flower, .fns = as.numeric)) %>% 
  select(-purse, -shoe) %>%
    mutate(condition = factor(condition,
                            levels = c("0", "1", "2"),
                            labels = c("label only", "irrelevant", "purpose"))) %>% 
  mutate(across(.cols = plane :  flower, .fns = as.numeric)) %>%
   mutate(total = plane + drum + quarter + apple + leaf_insect + flower,
          percent = (total / 6) * 100) %>% 
  mutate(condition = factor(condition, levels = c("label only", "irrelevant", "purpose"))) %>% 
  select(participant, condition, percent) 

p3 = ggplot(data = df.plot,
         mapping = aes(x = condition,
                       y = percent,
                       group = condition,
                       color = condition,
                       fill = condition)) + 
    stat_summary(fun = mean, 
                 geom = "bar", 
                 position = position_dodge(width = 0.5), 
                 alpha = .9,
                 color = "black",
                 size = 1) +
     geom_hline(yintercept = 50, linetype="dashed", 
               color = "black", size=.5) +
     geom_point(aes(color = condition,
                   fill = condition),
               position = position_jitterdodge(dodge.width = 0.8,
                                               jitter.width = 0.5,
                                               jitter.height = 0.0),
               shape = 21,
               size = 2,
               color = "grey30",
               alpha = .4) +
    stat_summary(fun.data = mean_cl_boot, 
                 geom = "linerange", 
                 position = position_dodge(width = 0.5),
                 color = "black",
                 size = 1.2) +
 scale_fill_manual(values = c("label only" = "#377EB8", "irrelevant" = "#E41A1C", "purpose" = "#4DAF4A"), breaks=c("label only", "irrelevant", "purpose")) +
  scale_color_manual(values = c("label only" = "#377EB8", "irrelevant" = "#E41A1C", "purpose" = "#4DAF4A"), breaks=c("label only", "irrelevant", "purpose")) +
   scale_y_continuous(breaks = seq(0, 100, 25),
                        labels = str_c(seq(0, 100, 25), "%"),
                       limits = c(0, 100)) +
    labs(y = "Probability of selecting \n\ taxonomic match") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=18),
          axis.text.y = element_text(size = 14),
          legend.title = element_blank(),
          legend.text = element_text(size = 14),
          legend.position = "bottom",
          axis.text.x = element_blank(),
          plot.title = element_text(hjust = 0.5, face="bold", size=16, margin = margin(t = 20, b = 20))) +
    ggtitle("Overall")

p3

ggsave("../../figures/exp2/exp2_condition_bar.pdf",
       height = 5, 
       width = 8)

8.5 Kind by condition

df.exp2 = read.csv("../../data/exp2/exp2.csv")

df.plot = df.exp2 %>%
  filter(completed_all_trials == 1) %>% 
  mutate(across(.cols = plane : flower, .fns = as.numeric)) %>%
  mutate(condition = factor(condition,
                            levels = c("0", "1", "2"),
                            labels = c("label only", "irrelevant", "purpose"))) %>% 
  select(-purse, -shoe) %>%
  pivot_longer(cols = (plane: flower),
               names_to = "item",
               values_to = "rating",
               values_drop_na = TRUE) %>%
  relocate(participant, condition, item, rating, age) %>%
  select(participant: age) %>%
  mutate(kind = case_when( item == "plane" ~ 0,
                          item == "drum" ~ 0,
                          item == "quarter" ~ 0,
                          item == "apple" ~ 1,
                          item == "leaf_insect" ~ 1,
                          item == "flower" ~ 1)) %>%
  mutate(kind = factor(kind,
                       levels = c("0", "1"),
                       labels = c("artifacts", "biological kinds"))) %>% 
  group_by(participant, condition, kind) %>%
  summarise(total = sum(rating),
            percent = (total / 3) * 100) %>% 
  mutate(condition = factor(condition, levels = c("label only", "irrelevant", "purpose"))) %>% 
  select(participant, condition, kind, total, percent)



p4 = ggplot(data = df.plot,
         mapping = aes(x = condition,
                       y = percent,
                       group = kind,
                       color = kind,
                       fill = kind)) + 
    stat_summary(fun = mean, 
                 geom = "bar", 
                 position = position_dodge(width = 0.9), 
                 alpha = .9,
                 color = "black",
                 size = 1) +
     geom_hline(yintercept = 50, linetype="dashed", 
               color = "black", size=.5) +
     geom_point(aes(color = kind,
                   fill = kind),
               position = position_jitterdodge(dodge.width = 0.8,
                                               jitter.width = 0.5,
                                               jitter.height = 0.0),
               shape = 21,
               size = 2,
               color = "grey30",
               alpha = .4) +
    stat_summary(fun.data = mean_cl_boot, 
                 geom = "linerange", 
                 position = position_dodge(width = 0.9),
                 color = "black",
                 size = 1.2) +
 scale_fill_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
  scale_color_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
   scale_y_continuous(breaks = seq(0, 100, 25),
                        labels = str_c(seq(0, 100, 25), "%"),
                       limits = c(0, 100)) +
    labs(y = "Probability of selecting \n\ taxonomic match") +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(size = 14),
          axis.text.y = element_text(size = 14),
          axis.title.y = element_text(size=18),
          legend.title = element_blank(),
          legend.text = element_text(size = 14),
          legend.position = "bottom",
          plot.title = element_text(hjust = 0.5, face="bold", size=16, margin = margin(t = 20, b = 20))) +
    ggtitle("Artifacts vs. Biological Kinds")

p4

ggsave("../../figures/exp2/exp2_condition_kind_bar.pdf",
       height = 5, 
       width = 8)

8.6 Combine condition and kind plots

plot_super = p3 + p4 &
plot_layout(nrow = 1) &
plot_annotation(title = "Superordinate-level categorization",
                tag_levels = "A") &
theme(plot.tag = element_text(face = "bold", size = 24),
      plot.title = element_text(face = "bold", hjust = 0.5, size = 24))

ggsave(plot_super, filename = "../../figures/exp2/exp2_condition_kind_combined_bar.pdf", height = 5, width = 12)

plot_super

#EXPERIMENT 3 (Norming) #PART 1 (What is this for?) #DATA ## Read in data

df.exp3a = read_csv("../../data/exp3/exp3_part1.csv")

8.7 Wrangle

df.exp3a = df.exp3a %>% 
  select(participant, age, sink:shirt) %>% 
  pivot_longer(., cols = sink:shirt,
               names_to = "item",
               values_to = "text")

8.8 Demographics

read_csv("../../data/exp3/exp3_part1.csv")%>% 
  group_by(gender) %>% 
  count()
## # A tibble: 5 × 2
## # Groups:   gender [5]
##   gender     n
##   <chr>  <int>
## 1 N/A        1
## 2 female    28
## 3 male      14
## 4 na         1
## 5 <NA>       1
read_csv("../../data/exp3/exp3_part1.csv") %>% 
  group_by(native_english) %>% 
  count()
## # A tibble: 3 × 2
## # Groups:   native_english [3]
##   native_english     n
##            <dbl> <int>
## 1              0     8
## 2              1    30
## 3             NA     7

#PLOTS

8.8.1 Probability of top 10 words for all items

# 1 grams
df.1gram = df.exp3a %>% 
  unnest_tokens(word, text) %>%
  anti_join(stop_words) 

df.plot = df.1gram %>% 
  group_by(item) %>% 
  count(word, sort = TRUE) %>% 
  group_by(item) %>% 
  mutate(item_total = sum(n),
         probability = round(n/item_total * 100, 2),
         rank = row_number(desc(probability))) %>%
  filter(rank <= 10) %>%
  mutate(word = reorder_within(word, probability, item)) %>%
  ungroup() %>%
  drop_na() %>% 
   separate(word, into = c("word1", "word2"), sep = "___") %>%
  select(-word2) %>% 
  rename(word = word1)

  

ggplot(data = df.plot, aes(x = word, y = probability)) +
  geom_hline(yintercept = 50, linetype = "dashed", color = "red") +
  geom_hline(yintercept = 25, linetype = "dashed", color = "blue") +
  geom_col(position = "dodge") +
  facet_wrap(~ item, scales = "free_x") +
  ggtitle("Probability of Words by Item") +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = str_c(seq(0, 100, 25), "%"),
                     limits = c(0, 100)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggsave(width = 20, height = 20, "../../figures/exp3/part1/exp3a_topwords_by_item_probability.pdf")

8.8.2 One gram probability of selected items

# 1 grams
df.1gram_selected = df.exp3a %>% 
  unnest_tokens(word, text) %>%
  anti_join(stop_words) 

df.plot = df.1gram_selected %>% 
  na.omit() %>%
  group_by(item) %>% 
  count(word, sort = TRUE) %>% 
  group_by(item) %>% 
  mutate(item_total = sum(n),
         probability = round(n/item_total * 100, 2),
         rank = row_number(desc(probability))) %>%
  filter(rank <= 10) %>%
  mutate(word = reorder_within(word, probability, item)) %>%
  ungroup() %>%
  drop_na() %>% 
   separate(word, into = c("word1", "word2"), sep = "___") %>%
  select(-word2) %>% 
  rename(word = word1) %>% 
  filter(item %in% c("chicken", "monkey", "sink", "toilet", "lion", "cow", "birdfeeder", "coffeepot", "bee", "spider", "lamp", "umbrella", "skunk", "squirrel", "plate", "clock"))

df.plot.artifacts = df.plot %>% 
  filter(item %in% c("sink", "toilet", "birdfeeder", "coffeepot", "lamp", "umbrella", "plate", "clock"))

df.plot.animals = df.plot %>% 
  filter(item %in% c("chicken", "monkey", "lion", "cow", "bee", "spider", "skunk", "squirrel"))


p5 = ggplot(data = df.plot.artifacts, aes(x = word,
                           y = probability)) +
  geom_col(position = "dodge",
            color = "black",
                           fill = "#984EA3") +
  facet_wrap(~ item, scales = "free_x",
             nrow = 2) +
  scale_y_continuous(limits = c(0, 100), 
                     expand = c(0, 0),
                     labels = str_c(seq(0, 100, 25), "%")) +
   labs(y = "Probability of word") +
  geom_hline(yintercept = 50, linetype = "dashed", color = "black") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5, face="bold", size=20, margin = margin(t = 20, b = 20))) +
    ggtitle("Top Artifacts")


p6 = ggplot(data = df.plot.animals, aes(x = word,
                           y = probability)) +
  geom_col(position = "dodge",
            color = "black",
                           fill = "#FF7F00") +
  facet_wrap(~ item, scales = "free_x",
             nrow = 2) +
  scale_y_continuous(limits = c(0, 100), 
                     expand = c(0, 0),
                     labels = str_c(seq(0, 100, 25), "%")) +
   labs(y = "Probability of word") +
  geom_hline(yintercept = 50, linetype = "dashed", color = "black") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5, face="bold", size=20, margin = margin(t = 20, b = 20))) +
    ggtitle("Top Biological Kinds")

8.8.3 Combine artifact and biological kind select item plots

plot_word = p5 + p6 + 
  plot_layout(nrow = 1) 

ggsave(plot_word, filename = "../../figures/exp3/part1/exp3a_item_kind_combined_bar.pdf", height = 5, width = 12)

plot_word

9 PART 2 (What one X or Y, is for this?)

9.1 Read in data

df.exp3b = read_csv("../../data/exp3/exp3_part2.csv")

9.2 Wrangle

df.exp3b = df.exp3b %>%
  filter(participant %in% (1:40)) %>% 
  select(participant, gender, language, make_honey:protect_us_from_rain) %>% 
  pivot_longer(cols = make_honey:protect_us_from_rain,
               names_to = "item",
               values_to = "response") %>% 
  na.omit()

9.3 Demographics

  # gender
read_csv("../../data/exp3/exp3_part2.csv") %>% 
  filter(participant %in% (1:40)) %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant)) %>%
  print()
## # A tibble: 2 × 2
##   gender     n
##   <chr>  <int>
## 1 female    21
## 2 male      19
  # language spoken
read_csv("../../data/exp3/exp3_part2.csv") %>% 
  filter(participant %in% (1:40)) %>%
  mutate(en_occurrence = ifelse(str_detect(language, "en"), "en", "non-en")) %>%
  group_by(en_occurrence) %>%
  summarise(n = n_distinct(participant)) %>%
  print()
## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en               39
## 2 non-en            1

#PLOTS ## Correct Selections for Artifacts and Biological Kinds

df.plot = df.exp3b %>% 
  filter(item %in% c("make_honey", "feed_birds", "lay_eggs", "tell_time", "hold_coffee", "make_milk", "provide_light", "roar", "swing_from_trees", "eat_on", "wash_hands", "smell_stinky", "make_webs", "eat_nuts", "go_to_bathroom", "protect_us_from_rain")) %>% 
  mutate(item = as.factor(item),
         item = recode(item, 
                       "make_honey" = "bee",
                       "feed_birds" = "bird feeder",
                       "lay_eggs" = "chicken",
                       "tell_time" = "clock",
                       "hold_coffee" = "coffee pot",
                       "make_milk" = "cow",
                       "provide_light" = "lamp",
                       "roar" = "lion",
                       "swing_from_trees" = "monkey",
                       "eat_on" = "plate",
                       "wash_hands" = "sink",
                       "smell_stinky" = "skunk",
                       "make_webs" = "spider",
                       "eat_nuts" = "squirrel",
                       "go_to_bathroom" = "toilet",
                       "protect_us_from_rain" = "umbrella")) 

df.plot.artifacts = df.plot %>% 
  filter(item %in% c("bird feeder", "clock", "coffee pot", "lamp", "plate", "sink", "toilet", "umbrella")) 

df.plot.animals = df.plot %>%
  filter(item %in% c("bee", "chicken", "cow", "lion", "monkey", "skunk", "spider", "squirrel"))



p7 = ggplot(data = df.plot.artifacts,
       mapping = aes(x = item, 
                     y = response)) +
   stat_summary(fun = mean, 
                 geom = "bar", 
                 position = position_dodge(width = 0.5), 
                 alpha = 0.8,
                 color = "black",
                fill = "#984EA3",
                size = 1.5) +
    stat_summary(fun.data = mean_cl_boot, 
                 geom = "linerange", 
                 position = position_dodge(width = 0.5),
                 color = "black",
                 size = .7) +
    # scale_fill_manual(values = my_palette) +
    # scale_color_manual(values = my_palette) +
   scale_y_continuous(breaks = seq(0, 1, .25),
                        labels = str_c(seq(0, 100, 25), "%"),
                       limits = c(0, 1)) +
    labs(y = "Probability of selecting \n\ object with purpose") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=14),
          legend.title = element_blank(),
          plot.title = element_text(hjust = 0.5, face="bold", size=20, margin = margin(t = 20, b = 20))) +
    ggtitle("Top Artifacts")



p8 = ggplot(data = df.plot.animals,
       mapping = aes(x = item, 
                     y = response)) +
   stat_summary(fun = mean, 
                 geom = "bar", 
                 position = position_dodge(width = 0.5), 
                 alpha = 0.8,
                 color = "black",
                fill = "#FF7F00",
                size = 1.5) +
    stat_summary(fun.data = mean_cl_boot, 
                 geom = "linerange", 
                 position = position_dodge(width = 0.5),
                 color = "black",
                 size = .7) +
    # scale_fill_manual(values = my_palette) +
    # scale_color_manual(values = my_palette) +
   scale_y_continuous(breaks = seq(0, 1, .25),
                        labels = str_c(seq(0, 100, 25), "%"),
                       limits = c(0, 1)) +
    labs(y = "Probability of selecting \n\ object with purpose") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=14),
          legend.title = element_blank(),
          plot.title = element_text(hjust = 0.5, face="bold", size=20, margin = margin(t = 20, b = 20))) +
    ggtitle("Top Biological Kinds")

9.4 Combine artifact and biological kind plots

plot_correct = p7 + p8 + 
  plot_layout(nrow = 1) 

ggsave(plot_correct, filename = "../../figures/exp3/part2/exp3b_item_kind_combined_bar.pdf", height = 5, width = 12)

plot_correct

10 EXPERIMENT 3 (Transformation)

10.1 Read in data

df.exp4 = read_csv("../../data/exp4/exp4.csv")

10.2 Wrangle

df.exp4 = df.exp4 %>%
  select(participant_number, age_exact, condition, lion:plate) %>%
  pivot_longer(cols = lion:plate,
               names_to = "item",
               values_to = "response") %>%
  mutate(kind = ifelse(item %in% c("bee", "skunk", "lion"), "biological kinds", "artifacts"),
         age_whole = as.integer(age_exact),
         condition = str_replace_all(condition, "keil", "control")) %>% 
  rename(age = age_exact) 

10.3 Demographics

  # gender
read_csv("../../data/exp4/exp4.csv") %>%
  group_by(gender) %>%
  summarise(n = n_distinct(participant_number)) %>%
  print()
## # A tibble: 3 × 2
##   gender     n
##   <chr>  <int>
## 1 f        156
## 2 m        143
## 3 <NA>       1
  # language spoken
read_csv("../../data/exp4/exp4.csv") %>%
  mutate(en_occurrence = ifelse(str_detect(lan_spoken, "en"), "en", "non-en")) %>%
  group_by(en_occurrence) %>%
  summarise(n = n_distinct(participant_number)) %>%
  print()
## # A tibble: 2 × 2
##   en_occurrence     n
##   <chr>         <int>
## 1 en              274
## 2 <NA>             26

11 STATS

11.1 Bayesian Models

11.1.1 Hypothesis 1

df.artifacts = df.exp4 %>% 
  filter(kind == "artifacts")

fit.brm.artifacts = brm(formula = response ~ 1 + condition*age + (1 | participant_number) + (1 | item), 
                     data = df.artifacts,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                     control = list(adapt_delta = .9999,
                                    max_treedepth = 15),
                     cores = 8,
                     file = "cache/fit.brm.artifacts")

fit.brm.artifacts
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: response ~ 1 + condition * age + (1 | participant_number) + (1 | item) 
##    Data: df.artifacts (Number of observations: 900) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.48      0.69     0.01     2.33 1.00     3336     4405
## 
## ~participant_number (Number of levels: 300) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     4.38      0.61     3.33     5.69 1.00     2788     4831
## 
## Regression Coefficients:
##                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept              -16.49      4.18   -25.56    -9.15 1.00     2358
## conditionpurpose        10.76      4.50     2.56    20.50 1.00     2214
## age                      1.34      0.49     0.47     2.38 1.00     2390
## conditionpurpose:age    -0.55      0.55    -1.71     0.48 1.00     2246
##                      Tail_ESS
## Intercept                3672
## conditionpurpose         3711
## age                      3395
## conditionpurpose:age     3749
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

11.1.2 Hypothesis 2

df.biological = df.exp4 %>% 
  filter(kind == "biological kinds")

fit.brm.biological = brm(formula = response ~ 1 + condition*age + (1 | participant_number) + (1 | item), 
                     data = df.biological,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                    control = list(adapt_delta = .9999,
                                    max_treedepth = 15),
                     cores = 8,
                     file = "cache/fit.brm.biological")

fit.brm.biological
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: response ~ 1 + condition * age + (1 | participant_number) + (1 | item) 
##    Data: df.biological (Number of observations: 900) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.54      0.74     0.01     2.55 1.00     3402     3704
## 
## ~participant_number (Number of levels: 300) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     5.18      0.73     3.92     6.78 1.00     3238     5594
## 
## Regression Coefficients:
##                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept              -16.18      4.15   -25.01    -8.68 1.00     2307
## conditionpurpose        10.78      4.72     1.98    20.67 1.00     2219
## age                      1.31      0.49     0.38     2.34 1.00     2316
## conditionpurpose:age    -0.60      0.59    -1.80     0.53 1.00     2194
##                      Tail_ESS
## Intercept                3717
## conditionpurpose         3418
## age                      3583
## conditionpurpose:age     3749
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

11.1.3 Hypothesis 3

df.purpose = df.exp4 %>% 
  filter(condition == "purpose")

fit.brm.purpose = brm(formula = response ~ 1 + kind*age + (1 | participant_number) + (1 | item), 
                     data = df.purpose,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                    control = list(adapt_delta = .9999,
                                    max_treedepth = 15),
                     cores = 8,
                     file = "cache/fit.brm.purpose")

fit.brm.purpose
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: response ~ 1 + kind * age + (1 | participant_number) + (1 | item) 
##    Data: df.purpose (Number of observations: 900) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.41      0.35     0.02     1.31 1.00     3587     4274
## 
## ~participant_number (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     4.96      0.64     3.85     6.37 1.00     2553     5288
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  -6.85      2.66   -12.34    -1.90 1.00     1618
## kindbiologicalkinds         1.11      1.45    -1.77     4.00 1.00     6157
## age                         0.90      0.35     0.26     1.61 1.00     1582
## kindbiologicalkinds:age    -0.18      0.18    -0.54     0.17 1.00     6422
##                         Tail_ESS
## Intercept                   2759
## kindbiologicalkinds         7112
## age                         2839
## kindbiologicalkinds:age     7116
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

11.1.4 Hypothesis 4

df.control = df.exp4 %>% 
  filter(condition == "control")

fit.brm.control = brm(formula = response ~ 1 + kind*age + (1 | participant_number) + (1 | item), 
                     data = df.control,
                     family = "bernoulli",
                     seed = 1,
                     iter = 4000,
                     warmup = 1000,
                    control = list(adapt_delta = .9999,
                                    max_treedepth = 15),
                     cores = 8,
                     file = "cache/fit.brm.control")

fit.brm.control
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: response ~ 1 + kind * age + (1 | participant_number) + (1 | item) 
##    Data: df.control (Number of observations: 900) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~item (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.42      0.41     0.02     1.48 1.00     4462     6352
## 
## ~participant_number (Number of levels: 150) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     4.31      0.77     3.04     6.03 1.00     2810     4600
## 
## Regression Coefficients:
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                 -15.11      3.90   -23.68    -8.14 1.00     3522
## kindbiologicalkinds         0.79      2.33    -3.80     5.46 1.00     9500
## age                         1.14      0.45     0.30     2.11 1.00     3474
## kindbiologicalkinds:age     0.03      0.28    -0.54     0.58 1.00     9047
##                         Tail_ESS
## Intercept                   5570
## kindbiologicalkinds         7919
## age                         5399
## kindbiologicalkinds:age     7941
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

12 PLOTS

12.1 Bayesian Logistic Regression + Means for Purpose and Control Across Age

# df.data = expand_grid(age = seq(4, 9, 0.1),
#                       kind = c("artifacts", "biological kinds"))
# 
# df.prediction = fit.brm.purpose %>%
#   fitted(newdata = df.data,
#          re_formula = NA,
#           probs = c(0.1, 0.9)) %>%
#   as_tibble() %>%
#   bind_cols(df.data) %>%
#   mutate(type = "purpose") %>%
#   bind_rows(fit.brm.control %>%
#               fitted(newdata = df.data,
#                      re_formula = NA,
#                      probs = c(0.1, 0.9)) %>%
#               as_tibble() %>%
#               bind_cols(df.data) %>%
#               mutate(type = "control")) %>%
#   clean_names()
# 
# df.means = df.exp4 %>%
#   dplyr::group_by(age_whole, kind, condition) %>%
#   dplyr::summarize(
#     response = Hmisc::smean.cl.boot(response)) %>%
#    mutate(index = c("response", "low", "high")) %>%
#     ungroup() %>%
#     pivot_wider(names_from = index,
#                 values_from = response) %>%
#   mutate(age_whole = as.numeric(as.character(age_whole)),
#          age_whole = age_whole + 0.5) %>%
#   filter(age_whole != 0.5)
# 
# 
# ggplot(mapping = aes(x = age,
#                      y = response,
#                      color = kind)) +
#   geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
#   geom_smooth(data = df.prediction,
#               mapping = aes(y = estimate,
#                             ymin = q10,
#                             ymax = q90,
#                             color = question,
#                             fill = question),
#               stat = "identity",
#               alpha = 0.2) +
#   geom_pointrange(data = df.means,
#                   mapping = aes(x = age_whole,
#                                 y = response,
#                                 ymin = low,
#                                 ymax = high,
#                                 fill = question,
#                                 shape = question),
#                   color = "black",
#                   size = 0.5,
#                   show.legend = FALSE,
#                   alpha = 0.8,
#                   position = position_dodge(width = 0.5)) +
#  scale_fill_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
#   scale_color_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds"))
#   scale_shape_manual(values = c(21, 23)) +
#   scale_y_continuous(breaks = seq(0, 1, 0.25),
#                       labels = str_c(seq(0, 1, 0.25) * 100, "%")) +
#   scale_x_continuous(breaks = 5:10,
#                      labels = c("5", "6", "7", "8", "9", "10")) +
#     labs(y = "Probability of selecting \n original thing") +  theme(axis.title.y = element_text(size=12),
#           legend.title = element_blank(),
#           legend.position = "bottom",
#           plot.title = element_text(hjust = 0.5,
#                                     face="bold",
#                                     size=20,
#                                     margin = margin(t = 20, b = 20))) +
#     ggtitle("Artifacts vs. Biological Kinds \n in Control and Purpose Conditions") +
#   facet_wrap(~condition)

12.1.1 GLM + Means

df.exp4 = df.exp4 %>% 
  mutate(age_whole = as.factor(age_whole),
         kind = as.factor(kind))

df.points = df.exp4 %>% 
  group_by(participant_number, age, kind, condition) %>%
  summarise(response = mean(response)) 


df.means = df.exp4 %>% 
  dplyr::group_by(age_whole, kind, condition) %>% 
  dplyr::summarize(
    response = Hmisc::smean.cl.boot(response)) %>% 
   mutate(index = c("response", "low", "high")) %>%
    ungroup() %>% 
    pivot_wider(names_from = index,
                values_from = response) %>% 
  mutate(age_whole = as.numeric(as.character(age_whole)),
         age_whole = age_whole + 0.5) %>% 
  filter(age_whole != 0.5)


ggplot(data = df.exp4,
       mapping = aes(x = age, 
                     y = response,
                     color = kind,
                     fill = kind)) + 
   geom_hline(yintercept=0.50, linetype="dashed", color = "black") +
  #add points from df.points
  geom_point(data = df.points,
             mapping = aes(x = age,
                           y = response,
                           color = kind,
                           shape = kind),
             alpha = 0.2,
             size = 2,
             position = position_jitterdodge(dodge.width = 0.8,
                                             jitter.width = 0.5,
                                             jitter.height = 0.0)) +
  stat_smooth(method = "glm",
              method.args = list(family = "binomial")) +
   geom_pointrange(data = df.means,
                  mapping = aes(x = age_whole,
                                y = response,
                                ymin = low,
                                ymax = high,
                                fill = kind,
                                shape = kind),
                  color = "black",
                  size = 0.5,
                  show.legend = FALSE,
                  alpha = 0.8,
                  position = position_dodge(width = 0.25)) +
  scale_fill_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
  scale_color_manual(values = c("artifacts" = "#984EA3", "biological kinds" = "#FF7F00"), breaks=c("artifacts", "biological kinds")) +
  scale_shape_manual(values = c(21, 23)) +
  labs(y = "Probability of selecting \n original thing") +
   scale_x_continuous(limits=c(4.9, 10),
                     breaks = 5:10) +
  scale_y_continuous(limits = c(0, 1),  
                     breaks = seq(0, 1, 0.25),
                     labels = str_c(seq(0, 1, 0.25) * 100, "%")) + 
    theme(axis.title.y = element_text(size=12),
          legend.title = element_blank(),
          legend.position = "bottom",
          plot.title = element_text(hjust = 0.5, 
                                    face="bold", 
                                    size=20, 
                                    margin = margin(t = 20, b = 20))) +
    ggtitle("Artifacts vs. Biological Kinds \n in Control and Purpose Conditions") +
  facet_wrap(~condition)

ggsave(height = 6, width = 8, "../../figures/exp4/exp4.pdf")

13 SESSION INFO

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
##  [5] purrr_1.0.2     readr_2.1.5     tidyr_1.3.1     tibble_3.2.1   
##  [9] ggplot2_3.5.1   tidyverse_2.0.0 patchwork_1.2.0 janitor_2.2.0  
## [13] brms_2.21.0     Rcpp_1.0.12     knitr_1.45      emmeans_1.10.1 
## [17] tidytext_0.4.1 
## 
## loaded via a namespace (and not attached):
##  [1] gridExtra_2.3        inline_0.3.19        rlang_1.1.3         
##  [4] magrittr_2.0.3       snakecase_0.11.1     matrixStats_1.2.0   
##  [7] compiler_4.3.3       mgcv_1.9-1           loo_2.7.0           
## [10] systemfonts_1.0.6    vctrs_0.6.5          reshape2_1.4.4      
## [13] pkgconfig_2.0.3      crayon_1.5.2         fastmap_1.1.1       
## [16] backports_1.4.1      labeling_0.4.3       utf8_1.2.4          
## [19] rmarkdown_2.26       tzdb_0.4.0           ragg_1.3.0          
## [22] bit_4.0.5            xfun_0.43            cachem_1.0.8        
## [25] jsonlite_1.8.8       tinytable_0.2.1      SnowballC_0.7.1     
## [28] highr_0.10           parallel_4.3.3       cluster_2.1.6       
## [31] R6_2.5.1             bslib_0.7.0          stringi_1.8.4       
## [34] StanHeaders_2.32.6   rpart_4.1.23         jquerylib_0.1.4     
## [37] estimability_1.5     bookdown_0.38        rstan_2.32.6        
## [40] base64enc_0.1-3      bayesplot_1.11.1     splines_4.3.3       
## [43] Matrix_1.6-5         nnet_7.3-19          timechange_0.3.0    
## [46] tidyselect_1.2.1     rstudioapi_0.16.0    abind_1.4-5         
## [49] yaml_2.3.8           codetools_0.2-19     pkgbuild_1.4.4      
## [52] lattice_0.22-5       plyr_1.8.9           withr_3.0.0         
## [55] bridgesampling_1.1-2 posterior_1.5.0      coda_0.19-4.1       
## [58] evaluate_0.23        foreign_0.8-86       RcppParallel_5.1.7  
## [61] pillar_1.9.0         janeaustenr_1.0.0    tensorA_0.36.2.1    
## [64] checkmate_2.3.1      stats4_4.3.3         distributional_0.4.0
## [67] generics_0.1.3       vroom_1.6.5          hms_1.1.3           
## [70] rstantools_2.4.0     munsell_0.5.1        scales_1.3.0        
## [73] xtable_1.8-4         glue_1.7.0           Hmisc_5.1-2         
## [76] tools_4.3.3          diffobj_0.3.5        data.table_1.15.4   
## [79] tokenizers_0.3.0     mvtnorm_1.2-4        grid_4.3.3          
## [82] QuickJSR_1.1.3       colorspace_2.1-0     nlme_3.1-164        
## [85] htmlTable_2.4.2      Formula_1.2-5        cli_3.6.2           
## [88] textshaping_0.3.7    fansi_1.0.6          Brobdingnag_1.2-9   
## [91] gtable_0.3.5         sass_0.4.9           digest_0.6.35       
## [94] htmlwidgets_1.6.4    farver_2.1.2         htmltools_0.5.8.1   
## [97] lifecycle_1.0.4      bit64_4.0.5